#####################################
## MIDS application: replication   ##
## NOTE: computationally intensive ##
## portions of the code have been  ##
## commented out, with correspon-  ##
## ding output cached, for speed.  ##
## All code is seeded, however, to ##
## allow for full replication.     ##
#####################################

## Load required packages
library(NetMix)
library(splines)
library(ROCR)
library(auctestr)
library(stargazer)
library(forecast)

## Load data
load("Data/MIDdata.Rdata")


seeds <- c(703, 466, 325, 509, 536) 
## Estimate model from multiple
## random starting places, pick
## best model (cached)
# for(i in seeds){
#   f <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#                dist + peaceyrs + spline1 + spline2 + spline3,
#              formula.monad =  ~ polity + logNMC, 
#              data.dyad = MID_dyad, data.monad = MID_monad,
#              senderID = "country1", receiverID = "country2", 
#              nodeID = "country", timeID = "year",
#              n.blocks = 6,  n.hmmstates = 2, directed=FALSE,
#              mmsbm.control = list(verbose = TRUE,
#                                   svi=F,
#                                   nstart=1,
#                                   seed=i,
#                                   var_beta=25,
#                                   vi_iter=1500,
#                                   hessian=T))
#   f$seed <- i
#   eval(parse(text = paste("f_", i, "<- f", sep="")))
# }
# save(f_703, f_466, f_325, f_509, f_536, file="Cache/MIDs_seeds.Rdata")
load("Cache/MIDs_seeds.Rdata")
LB <- sapply(seeds, function(x){eval(parse(text=paste("f_", x, "$LowerBound", sep="")))})
names(LB) <- seeds
sort(LB) 

## Define best model
fit_onset <- f_466

## Relabel clusters for ease of discussion
fit_onset$MixedMembership <- fit_onset$MixedMembership[c(4,3,1,2,6,5),]
fit_onset$BlockModel <- fit_onset$BlockModel[c(4,3,1,2,6,5), c(4,3,1,2,6,5)]
fit_onset$MonadCoef <- fit_onset$MonadCoef[,c(4,3,1,2,6,5),]
fit_onset$CountMatrix <- fit_onset$CountMatrix[,c(4,3,1,2,6,5)]

summary(fit_onset)


#######################
## Main Text Results ##
#######################

## Figure 1
plot(fit_onset, type="blockmodel")
plot(fit_onset, type="groups")

## Figure 2
plot(fit_onset, type="membership")


## Figure 3
plot(fit_onset, type="membership", node="USA")
plot(fit_onset, type="membership", node="UK")
plot(fit_onset, type="membership", node="Russia")
plot(fit_onset, type="membership", node="Japan")
plot(fit_onset, type="membership", node="Cuba")
plot(fit_onset, type="membership", node="Iraq")


## Table 1
fit_onset$DyadCoef 
diag(fit_onset$vcov_dyad)
fit_onset$MonadCoef 
diag(fit_onset$vcov_monad)[c(13:15,9:11,1:3, 5:7,21:23,17:19)] # to match reordering



## Figure 4
p <- covFX(fm=fit_onset, cov="polity", shift=sd(fit_onset$monadic.data$polity), max.val=10) 
p[[1]] 
nodes <- sort(p[[3]])
names(nodes)[names(nodes)=="Egypt/UAR"] <- "Egypt"
names(nodes)[names(nodes)=="Germany East"] <- "East Germany"
names(nodes)[names(nodes)=="Korea North"] <- "North Korea"
names(nodes)[names(nodes)=="Libyan"] <- "Libya"
names(nodes)[names(nodes)=="FYROMacedonia"] <- "Macedonia"
names(nodes)[names(nodes)=="Brunei Darussalam"] <- "Brunei"

nodes <- nodes[names(nodes) %in% c("Kosovo", "Montenegro", "Brunei", "Malta", "Iceland", "Liberia", "Nicaragua",  
                                   "Israel", "Germany", "Ukraine", 
                                   "Russia", "France", "Japan", "Georgia", "Australia", "New Zealand",
                                   "India", "Netherlands", "Malaysia", "USA", "UK", "Indonesia", "Switzerland",
                                   "Sweden", "Finland", "Norway", "Poland", "Singapore", "Ireland", 
                                   "Czechoslovakia", "Austria", "Turkey", 
                                   "China", "Romania", "Pakistan", "South Africa", "Chile", "Spain",
                                   "Peru", "Lebanon", "Ecuador", "Croatia", "Cuba", "Costa Rica",
                                   "Brazil", "Lithuania", "Argentina", "Venezuela", "Belarus", "Israel", 
                                   "Yemen", "Iran", "Sierra Leone", "Ethiopia", "Afghanistan", "North Korea",
                                   "Morocco", "Iraq", 
                                   "Syria", "Sudan", "Jordan", "Libyan", "Saudi Arabia")]

plot(1, type="n", xlab="Node-Level Estimated Effect", ylab="", 
     xlim=c(min(nodes) - 0.0001, max(nodes) + 0.0008),
     ylim = c(0, length(nodes)), yaxt="n")
abline(v=0, col="red", lty=2)
for(i in 1:length(nodes)){
  points(nodes[i],i, pch=19)
  text(nodes[i] + 0.00001,i, names(nodes)[i], pos=4, cex=0.7)
}


## Figure 5
par(mfrow=c(2,3))
US <- fit_onset$dyadic.data$`(sid)`=="USA" | fit_onset$dyadic.data$`(rid)`=="USA"
US.dem <- tapply(p[[5]][US], fit_onset$dyadic.data$`(tid)`[US], mean)
plot(names(US.dem), US.dem, type="l", ylim=c(-0.035, 0.003), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="United States")
par(xpd = F) 
abline(h=0, lty=2)

UK <- fit_onset$dyadic.data$`(sid)`=="UK" | fit_onset$dyadic.data$`(rid)`=="UK"
UK.dem <- tapply(p[[5]][UK], fit_onset$dyadic.data$`(tid)`[UK], mean)
plot(names(UK.dem), UK.dem, type="l", ylim=c(-0.035, 0.003), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="UK")
par(xpd = F) 
abline(h=0, lty=2)

Rus <- fit_onset$dyadic.data$`(sid)`=="Russia" | fit_onset$dyadic.data$`(rid)`=="Russia"
Rus.dem <- tapply(p[[5]][Rus], fit_onset$dyadic.data$`(tid)`[Rus], mean)
plot(names(Rus.dem), Rus.dem, type="l", ylim=c(-0.035, 0.003), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="Russia")
par(xpd = F) 
abline(h=0, lty=2)

Jap <- fit_onset$dyadic.data$`(sid)`=="Japan" | fit_onset$dyadic.data$`(rid)`=="Japan"
Jap.dem <- tapply(p[[5]][Jap], fit_onset$dyadic.data$`(tid)`[Jap], mean)
plot(names(Jap.dem), Jap.dem, type="l", ylim=c(-0.035, 0.003), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="Japan")
par(xpd = F) 
abline(h=0, lty=2)

Cuba <- fit_onset$dyadic.data$`(sid)`=="Cuba" | fit_onset$dyadic.data$`(rid)`=="Cuba"
Cuba.dem <- tapply(p[[5]][Cuba], fit_onset$dyadic.data$`(tid)`[Cuba], mean)
plot(names(Cuba.dem), Cuba.dem, type="l",  ylim=c(-0.035, 0.003), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="Cuba")
par(xpd = F) 
abline(h=0, lty=2)

Iraq <- fit_onset$dyadic.data$`(sid)`=="Iraq" | fit_onset$dyadic.data$`(rid)`=="Iraq"
Iraq.dem <- tapply(p[[5]][Iraq], fit_onset$dyadic.data$`(tid)`[Iraq], mean)
plot(names(Iraq.dem), Iraq.dem, type="l",  ylim=c(-0.035, 0.003), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="Iraq")
par(xpd = F) 
abline(h=0, lty=2)

## Examine shift in Russia's estimated effect of polity, 1918 vs 1922
R18 <- which(fit_onset$monadic.data$`(nid)`=="Russia" & fit_onset$monadic.data$`(tid)`==1918)
R22 <- which(fit_onset$monadic.data$`(nid)`=="Russia" & fit_onset$monadic.data$`(tid)`==1922)
newdat <- fit_onset$monadic.data
newdat$polity[R18] <- fit_onset$monadic.data$polity[R18] + sd(fit_onset$monadic.data$polity)
newdat$polity[R18] <- fit_onset$monadic.data$polity[R18] + sd(fit_onset$monadic.data$polity)
newdat$polity[R22] <- fit_onset$monadic.data$polity[R22] + sd(fit_onset$monadic.data$polity)

R_mm <- predict(fit_onset, new.data.monad=newdat, type="mm")
fit_onset$MixedMembership[,R18] # observed 1918 membership vector
R_mm[,R18] # estimated 1918 membership vector with polity increase

fit_onset$MixedMembership[,R22] # observed 1922 membership vector
R_mm[,R22] # estimated 1922 membership vector with polity increase


## Figure 6
par(mfrow=c(1,1))
time.avg <- tapply(p[[5]], fit_onset$dyadic.data[,"(tid)"], mean)
time <- 1816:2010
plot(time, time.avg, type="p", ylim=c(-0.01, 0.005),
     xlab="", ylab="Effect of Polity on Pr(Edge Formation)")
abline(h=0, lty=2)


######################
## Appendix Results ##
######################

## Forecasting comparisons: estimate on 1816-2008, forecast 2009-2010
MID_monad2 <- MID_monad[MID_monad$year <= 2008,]
MID_dyad2 <- MID_dyad[MID_dyad$year <= 2008,]

## compare dynMMSBM with different numbers of latent groups

## Estimate models with different numbers of latent blocks (cached)
# M2 <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#               dist + peaceyrs + spline1 + spline2 + spline3,
#             formula.monad =  ~ polity + logNMC, 
#             data.dyad = MID_dyad2, data.monad = MID_monad2,
#             senderID = "country1", receiverID = "country2", 
#             nodeID = "country", timeID = "year",
#             n.blocks = 2,  n.hmmstates = 2, directed=FALSE,
#             mmsbm.control = list(svi=F, nstart=1, verbose=T,
#                                  seed=466, var_beta=25,
#                                  vi_iter=1500, hessian=F))
# 
# M3 <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#               dist + peaceyrs + spline1 + spline2 + spline3,
#             formula.monad =  ~ polity + logNMC, 
#             data.dyad = MID_dyad2, data.monad = MID_monad2,
#             senderID = "country1", receiverID = "country2", 
#             nodeID = "country", timeID = "year",
#             n.blocks = 3,  n.hmmstates = 2, directed=FALSE,
#             mmsbm.control = list(svi=F, nstart=1, verbose=T,
#                                  seed=466, var_beta=25,
#                                  vi_iter=1500, hessian=F))
# 
# M4 <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#               dist + peaceyrs + spline1 + spline2 + spline3,
#             formula.monad =  ~ polity + logNMC, 
#             data.dyad = MID_dyad2, data.monad = MID_monad2,
#             senderID = "country1", receiverID = "country2", 
#             nodeID = "country", timeID = "year",
#             n.blocks = 4,  n.hmmstates = 2, directed=FALSE,
#             mmsbm.control = list(svi=F, nstart=1, verbose=T,
#                                  seed=466, var_beta=25,
#                                  vi_iter=1500, hessian=F))
# 
# M5 <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#               dist + peaceyrs + spline1 + spline2 + spline3,
#             formula.monad =  ~ polity + logNMC, 
#             data.dyad = MID_dyad2, data.monad = MID_monad2,
#             senderID = "country1", receiverID = "country2", 
#             nodeID = "country", timeID = "year",
#             n.blocks = 5,  n.hmmstates = 2, directed=FALSE,
#             mmsbm.control = list(svi=F, nstart=1, verbose=T,
#                                  seed=466, var_beta=25,
#                                  vi_iter=1500, hessian=F))
# 
# M6 <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#               dist + peaceyrs + spline1 + spline2 + spline3,
#             formula.monad =  ~ polity + logNMC, 
#             data.dyad = MID_dyad2, data.monad = MID_monad2,
#             senderID = "country1", receiverID = "country2", 
#             nodeID = "country", timeID = "year",
#             n.blocks = 6,  n.hmmstates = 2, directed=FALSE,
#             mmsbm.control = list(verbose = TRUE,
#                                  svi=F,
#                                  nstart=1,
#                                  seed=466,
#                                  var_beta=25,
#                                  vi_iter=1500,
#                                  hessian=F))
# 
# M7 <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#               dist + peaceyrs + spline1 + spline2 + spline3,
#             formula.monad =  ~ polity + logNMC, 
#             data.dyad = MID_dyad2, data.monad = MID_monad2,
#             senderID = "country1", receiverID = "country2", 
#             nodeID = "country", timeID = "year",
#             n.blocks = 7,  n.hmmstates = 2, directed=FALSE,
#             mmsbm.control = list(svi=F, nstart=1, verbose=T,
#                                  seed=466, var_beta=25,
#                                  vi_iter=1500, hessian=F))
load("Cache/MIDS_robustness.Rdata")


## Forecast, then compare area under the ROC curve (AUROC) for each specification
# We first forecast 2009 MIDs, then impute peace years and splines for 2010, then forecast 2010
MID_dyad_forecast <- MID_dyad
MID_dyad_forecast$IGOmems_joint_missing <- ifelse(is.na(MID_dyad_forecast$IGOmems_joint), 1, 0)
MID_dyad_forecast$IGOmems_joint[MID_dyad_forecast$IGOmems_joint_missing==1] <- 0
MID_dyad_forecast$dist_missing <- ifelse(is.na(MID_dyad_forecast$dist), 1, 0)
MID_dyad_forecast$dist[MID_dyad_forecast$dist_missing==1] <- 0
MID_monad_forecast <- MID_monad
MID_monad_forecast$polity_missing <- ifelse(is.na(MID_monad_forecast$polity), 1, 0)
MID_monad_forecast$polity[is.na(MID_monad_forecast$polity)] <- 0

MID.2009 <- MID_dyad_forecast[MID_dyad_forecast$year==2009,]
MID.monad.2009 <- MID_monad_forecast[MID_monad_forecast$year==2009,]
for(i in 2:7){
  mod <- eval(parse(text=paste("M", i, sep="")))
  forecast2009 <- predict(mod, new.data.dyad=MID.2009, new.data.monad=MID.monad.2009, 
                          type="response", forecast=T, parametric_mm=T)
  r2009 <- rbinom(nrow(MID.2009), 1, forecast2009)
  MID.new <- MID_dyad_forecast[MID_dyad_forecast$year %in% 2009:2010,]
  MID.new$peaceyrs[MID.new$year==2010] <- MID.new$peaceyrs[MID.new$year==2009] + 1 
  MID.new$peaceyrs[MID.new$year==2010 & MID.new$dyadID %in% MID.2009$dyadID[which(r2009==1)]] <- 0
  splines <- bs(MID.new$peaceyrs)
  MID.new[,c("spline1", "spline2", "spline3")] <- splines
  forecast2010 <- predict(mod, new.data.dyad=MID.new[MID.new$year==2010,], 
                          new.data.monad=MID_monad_forecast[MID_monad_forecast$year==2010,], 
                          type="response", forecast=T, parametric_mm=T)
  eval(parse(text=paste("forecast_", i, " <- c(forecast2009, forecast2010)", sep="")))
}

auc.vals <- sapply(2:7, function(x){
  eval(parse(text=paste("performance(prediction(forecast_", x, 
                        ", labels=MID.new$MID_onset_DY), measure = 'auc')@y.values", sep="")))
})
np <- sum(MID.new$MID_onset_DY)
nn <- sum(MID.new$MID_onset_DY==0)
auc.ses <- sapply(2:7, function(x){
  eval(parse(text=paste("se_auc(auc.vals[[", x-1, "]], np, nn)", sep="")))
})

## Table SI2 (forecast accuracy, different numbers of latent groups)
stargazer(cbind(c(2,"",3,"",4,"",5,"",6,"",7,""),
        c(rbind(round(unlist(auc.vals),3), paste("(",round(auc.ses, 3),")",sep="")))))


## Supplementary Information: specification with regional indicators (cached)
# fit_onset_region <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#                             dist + peaceyrs + spline1 + spline2 + spline3,
#                           formula.monad =  ~ polity + logNMC + state_region, 
#                           data.dyad = MID_dyad, data.monad = MID_monad,
#                           senderID = "country1", receiverID = "country2", 
#                           nodeID = "country", timeID = "year",
#                           n.blocks = 6,  n.hmmstates = 2, directed=FALSE,
#                           mmsbm.control = list(verbose = TRUE,
#                                                svi=F,
#                                                nstart=1,
#                                                seed=466,
#                                                var_beta=25,
#                                                vi_iter=1500,
#                                                hessian=T))

# relabel clusters for ease of comparison
loss.mat <- fit_onset_region$MixedMembership %*% t(fit_onset$MixedMembership)
right.perm <- clue::solve_LSAP(t(loss.mat), TRUE)
fit_onset_region$MixedMembership <- fit_onset_region$MixedMembership[right.perm,]
fit_onset_region$BlockModel <- fit_onset_region$BlockModel[right.perm, right.perm]
fit_onset_region$MonadCoef <- fit_onset_region$MonadCoef[,right.perm,]
fit_onset_region$CountMatrix <- fit_onset_region$CountMatrix[,right.perm]

cor(c(fit_onset_region$MixedMembership), c(fit_onset$MixedMembership)) 
cor(c(fit_onset_region$BlockModel), c(fit_onset$BlockModel)) 


## Figure SI5 (estimated blockmodel, regional model)
plot(fit_onset_region, type="blockmodel")
plot(fit_onset_region, type="groups")


## Figure SI6 (evolution of group membership, regional model)
plot(fit_onset_region, type="membership")


## Table SI3 (states with highest membership in latent groups for regional model, Cold War period)
h <- head(fit_onset_region, t=1950:1990, n=15) 
hd <- data.frame(matrix(paste(round(unlist(h), 3), names(unlist(h)), sep=" "), nrow=length(h), byrow=T))
ht <- rbind(t(hd[1:3,]),t (hd[4:6,]))
rownames(ht) <- NULL
stargazer(ht)


## Figure SI7 (5-cluster specification)
plot(M5, type="blockmodel")
plot(M5, type="groups")


## Figure SI8 (7-cluster specification)
plot(M7, type="blockmodel")
plot(M7, type="groups")


## Table SI4 (estimated rates of conflict in blockmodel)
stargazer(plogis(fit_onset$BlockModel))


## Table SI5 (states with highest membership in latent groups, Cold War period)
h <- head(fit_onset, t=1950:1990, n=15) 
hd <- data.frame(matrix(paste(round(unlist(h), 3), names(unlist(h)), sep=" "), nrow=length(h), byrow=T))
ht <- rbind(t(hd[1:3,]),t (hd[4:6,]))
rownames(ht) <- NULL
stargazer(ht)


## Table SI6: coefficients for Markov State 2
fit_onset$MonadCoef[,,2]
diag(fit_onset$vcov_monad)[c(13:15,9:11,1:3, 5:7,21:23,17:19)+24] # to match reordering


## Estimation via online/real-time updating (cached)
# we use expanding 5-year windows
#MID_monad1820 <- MID_monad[MID_monad$year <= 1820,]
#MID_dyad1820 <- MID_dyad[MID_dyad$year <= 1820,]
#f1820 <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#                 dist + peaceyrs + spline1 + spline2 + spline3,
#               formula.monad =  ~ polity + logNMC, 
#               data.dyad = MID_dyad1820, data.monad = MID_monad1820,
#               senderID = "country1", receiverID = "country2", 
#               nodeID = "country", timeID = "year",
#               n.blocks = 6,  n.hmmstates = 2, directed=FALSE,
#               mmsbm.control = list(verbose = TRUE,
#                                    svi=F,
#                                    nstart=1,
#                                    seed=466,
#                                    var_beta=25,
#                                    vi_iter=1500,
#                                    hessian=F))
#w <- seq(1820, 2010, by=5)
#for(i in 2:length(w)){
#  dat.dyad <- MID_dyad[MID_dyad$year <= w[i],]
#  dat.monad <- MID_monad[MID_monad$year <= w[i],]
#  prevMM <- eval(parse(text=paste("f", w[i-1], "$MixedMembership", sep="")))
  
#  Hessian <- ifelse(w==2010, "F", "T")
#  f <- mmsbm(formula.dyad = MID_onset_DY ~ IGOmems_joint + ally + contiguity + 
#               dist + peaceyrs + spline1 + spline2 + spline3,
#             formula.monad =  ~ polity + logNMC, 
#             data.dyad = dat.dyad, data.monad = dat.monad,
#             senderID = "country1", receiverID = "country2", 
#             nodeID = "country", timeID = "year",
#             n.blocks = 6,  n.hmmstates = 2, directed=FALSE,
#             mmsbm.control = list(verbose = T,
#                                  svi=F,
#                                  nstart=1,
#                                  seed=466,
#                                  var_beta=25,
#                                  vi_iter=1500,
#                                  mm_init_t=prevMM,
#                                  fixed_mm=colnames(prevMM),
#                                  hessian=Hessian))
#  eval(parse(text=paste("f", w[i], " <- f", sep="")))
#}
loss.mat <- f2010$MixedMembership %*% t(fit_onset$MixedMembership)
right.perm <- clue::solve_LSAP(t(loss.mat), TRUE)
f2010$MixedMembership <- f2010$MixedMembership[right.perm,]
f2010$BlockModel <- f2010$BlockModel[right.perm, right.perm]
f2010$MonadCoef <- f2010$MonadCoef[,right.perm,]
f2010$CountMatrix <- f2010$CountMatrix[,right.perm]

cor(c(f2010$MixedMembership), c(fit_onset$MixedMembership))


## Figure SI9 (evolution of group membership, real-time updating model)
plot.mmsbm(f2010, type="membership")


## Figure SI10 (average node membership over time, real-time updating model)
plot(f2010, type="membership", node="USA")
plot(f2010, type="membership", node="UK")
plot(f2010, type="membership", node="Russia")
plot(f2010, type="membership", node="Japan")
plot(f2010, type="membership", node="Cuba")
plot(f2010, type="membership", node="Iraq")


## Table SI7 (estimated coefficients, real-time updating model)
f2010$DyadCoef 
diag(f2010$vcov_dyad)
f2010$MonadCoef[,,1]
diag(f2010$vcov_monad)[c(21:23,9:11,5:7,13:15,17:19,1:3)]


## Estimate effect of decrease in polity score 
predict.ties <- predict(fit_onset, type = "response")
monadic.data2 <- fit_onset$monadic.data
monadic.data2[, "polity"] <- fit_onset$monadic.data[, "polity"] - sd(fit_onset$monadic.data$polity)
monadic.data2[which(monadic.data2[, "polity"] == -10), "polity"] <- -10
predict.ties2 <- predict(fit_onset, new.data.monad = monadic.data2, type = "response")
p2 <- list(mean(predict.ties2 - predict.ties), 
           tapply(predict.ties2 - predict.ties, fit_onset$dyadic.data[, "(tid)"], mean), 
           sapply(unique(fit_onset$monadic.data[, "(nid)"]), function(x) {
             mean((predict.ties2 - predict.ties)[fit_onset$dyadic.data[, "(sid)"] == x | fit_onset$dyadic.data[, "(rid)"] == x])}), 
           tapply(predict.ties2 - predict.ties, paste(fit_onset$dyadic.data[,"(sid)"], 
                                                      fit_onset$dyadic.data[, "(rid)"], sep = "_"), mean),
           predict.ties2 - predict.ties)
names(p2[[3]]) <- unique(fit_onset$monadic.data[, "(nid)"])
names(p2[[5]]) <- paste(fit_onset$dyadic.data[, "(sid)"], 
                        fit_onset$dyadic.data[, "(rid)"], sep = "_")

## Figure SI12 (effect of decrease in polity over time)
par(mfrow=c(1,1))
time.avg <- tapply(p2[[5]], fit_onset$dyadic.data[,"(tid)"], mean)
time <- 1816:2010
plot(time, time.avg, type="p", ylim=c(-0.005, 0.01),
     xlab="", ylab="Effect of Decrease in Polity on Pr(Edge Formation)")
abline(h=0, lty=2)


## Figure SI13 (effect of decrease in polity for select states)
par(mfrow=c(2,3))
US <- fit_onset$dyadic.data$`(sid)`=="USA" | fit_onset$dyadic.data$`(rid)`=="USA"
US.dem <- tapply(p2[[5]][US], fit_onset$dyadic.data$`(tid)`[US], mean)
plot(names(US.dem), US.dem, type="l", ylim=c(-0.003, 0.02), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="United States")
par(xpd = F) 
abline(h=0, lty=2)

UK <- fit_onset$dyadic.data$`(sid)`=="UK" | fit_onset$dyadic.data$`(rid)`=="UK"
UK.dem <- tapply(p2[[5]][UK], fit_onset$dyadic.data$`(tid)`[UK], mean)
plot(names(UK.dem), UK.dem, type="l", ylim=c(-0.003, 0.02), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="UK")
par(xpd = F) 
abline(h=0, lty=2)

Rus <- fit_onset$dyadic.data$`(sid)`=="Russia" | fit_onset$dyadic.data$`(rid)`=="Russia"
Rus.dem <- tapply(p2[[5]][Rus], fit_onset$dyadic.data$`(tid)`[Rus], mean)
plot(names(Rus.dem), Rus.dem, type="l", ylim=c(-0.003, 0.02), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="Russia")
par(xpd = F) 
abline(h=0, lty=2)

Jap <- fit_onset$dyadic.data$`(sid)`=="Japan" | fit_onset$dyadic.data$`(rid)`=="Japan"
Jap.dem <- tapply(p2[[5]][Jap], fit_onset$dyadic.data$`(tid)`[Jap], mean)
plot(names(Jap.dem), Jap.dem, type="l", ylim=c(-0.003, 0.02), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="Japan")
par(xpd = F) 
abline(h=0, lty=2)

Cuba <- fit_onset$dyadic.data$`(sid)`=="Cuba" | fit_onset$dyadic.data$`(rid)`=="Cuba"
Cuba.dem <- tapply(p2[[5]][Cuba], fit_onset$dyadic.data$`(tid)`[Cuba], mean)
plot(names(Cuba.dem), Cuba.dem, type="l",  ylim=c(-0.003, 0.02), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="Cuba")
par(xpd = F) 
abline(h=0, lty=2)

Iraq <- fit_onset$dyadic.data$`(sid)`=="Iraq" | fit_onset$dyadic.data$`(rid)`=="Iraq"
Iraq.dem <- tapply(p2[[5]][Iraq], fit_onset$dyadic.data$`(tid)`[Iraq], mean)
plot(names(Iraq.dem), Iraq.dem, type="l",  ylim=c(-0.003, 0.02), xlim=c(1816, 2010),
     xlab="", ylab="Effect of Polity", main="Iraq")
par(xpd = F) 
abline(h=0, lty=2)


## Compare forecasting performance: 6-cluster dynMMSBM with logit
MID_dyad_forecast$polity_low <- ifelse(MID_dyad_forecast$polity_1 <= MID_dyad_forecast$polity_2, MID_dyad_forecast$polity_1, MID_dyad_forecast$polity_2)
MID_dyad_forecast$polity_high <- ifelse(MID_dyad_forecast$polity_1 > MID_dyad_forecast$polity_2, MID_dyad_forecast$polity_1, MID_dyad_forecast$polity_2)

logit.2008 <- glm(MID_onset_DY ~ IGOmems_joint + ally + contiguity + dist + 
                    polity_low + polity_high + cinc_ratio + 
                    peaceyrs + spline1 + spline2 + spline3 + 
                    IGOmems_joint_missing + dist_missing +  polity_missing,
                  data = MID_dyad_forecast[MID_dyad_forecast$year <= 2008,], family = binomial(link = "logit"))

MID.2009 <- MID_dyad_forecast[MID_dyad_forecast$year==2009,]
predict9.logit <- predict(logit.2008, newdata=MID.2009, type="response")
r9.logit <- rbinom(1:nrow(MID.2009), 1, predict9.logit)
MID.new <- MID_dyad_forecast[MID_dyad_forecast$year %in% 2009:2010,]
MID.new$peaceyrs[MID.new$year==2010] <- MID.new$peaceyrs[MID.new$year==2009] + 1
MID.new$peaceyrs[MID.new$year==2010 & MID.new$dyadID %in% MID.2009$dyadID[which(r9.logit==1)]] <- 0
MID.new[,c("spline1", "spline2", "spline3")] <- bs(MID.new$peaceyrs)
predict10.logit <- predict(logit.2008, newdata=MID.new[MID.new$year==2010,], type="response")
logit.forecast <- c(predict9.logit, predict10.logit)

## Compare forecasting accuracy with Diebold-Mariano tests
dm.test(MID.new$MID_onset_DY - forecast_6, MID.new$MID_onset_DY - logit.forecast)  

## Figure SI13 (ROC curves)
roc6 <- prediction(forecast_6, labels=MID.new$MID_onset_DY)
roc.logit <- prediction(logit.forecast, labels=MID.new$MID_onset_DY)

par(mfrow=c(1,1))
plot(unlist(attributes(performance(roc6, measure="tpr", x.measure="fpr"))["x.values"]), 
     unlist(attributes(performance(roc6, measure="tpr", x.measure="fpr"))["y.values"]), 
     type="l", lwd=2,
     xlab="False Positive Rate",
     ylab="True Positive Rate")
lines(unlist(attributes(performance(roc.logit, measure="tpr", x.measure="fpr"))["x.values"]), 
      unlist(attributes(performance(roc.logit, measure="tpr", x.measure="fpr"))["y.values"]), 
      type="l",
      col="red", lwd=2)
lines(c(0, 1), c(0,1), lty=2)
legend(x=0.7, y=0.2, legend=c("dynMMSBM","logit"),
       col=c("black", "red"), lty=1, lwd=2)


## Area under ROC curves, dynMMSBM vs Logit
auc.l <- performance(prediction(logit.forecast, labels=MID.new$MID_onset_DY), measure = 'auc')@y.values[[1]] 
auc.se.l <- se_auc(auc.l[[1]], np, nn)

auc.vals <- sapply(2:7, function(x){
  eval(parse(text=paste("performance(prediction(forecast_", x, 
                        ", labels=MID.new$MID_onset_DY), measure = 'auc')@y.values", sep="")))
})
np <- sum(MID.new$MID_onset_DY)
nn <- sum(MID.new$MID_onset_DY==0)
auc.ses <- sapply(2:7, function(x){
  eval(parse(text=paste("se_auc(auc.vals[[", x-1, "]], np, nn)", sep="")))
})

## Table SI8
stargazer(cbind(c("dynMMSBM","","","Logit","",""),
      c(round(auc.vals[[5]],3),round(auc.ses[[5]],3),"",round(auc.l,3),round(auc.se.l,3),"")))
      




